# import business licenses data
# bus_licenses <- read_csv(here::here("data", "Business_Licenses.csv"))
bus_licenses <- readRDS(here::here("data", "bus_licenses.Rda"))
# view problems report
# problems(bus_licenses)
# investigate nonstandard zip codes flagged -- looks like int'l businesses operating in Chicago
# opting not not to remove
# bind_cols(bus_licenses[unlist(problems(bus_licenses)[, 'row']), c('LICENSE ID', 'LEGAL NAME', 'DOING BUSINESS AS NAME', 'ADDRESS', 'CITY', 'STATE', 'BUSINESS ACTIVITY')], problems(bus_licenses)[, 'actual']) %>% arrange(ADDRESS) %>% distinct(`DOING BUSINESS AS NAME`, `BUSINESS ACTIVITY`, ADDRESS, CITY, STATE, actual)
In the last decade, underepresented minority populations in Chicago have been increasingly relegated to lower income areas in the South and West of the City.
# all_demos <- readRDS(here::here("data", "all_demos_chi.Rda"))
all_demos.Chi <- readRDS(here::here("data", "all_demos_chi_proj.Rda"))
wards.2015 <- readRDS(here::here("data", "wards2015_sf.Rda"))
# plot intersection of Census tracts with chicago wards
ggplot(st_intersection(all_demos.Chi, wards.2015) %>% filter(!is.na(predominant_race))) +
# color based on predominant race based on Census, shaded by percentage of that race
geom_sf(aes(fill=predominant_race, alpha=max_pct), lwd = 0) +
scale_fill_dt("diverging", reverse=TRUE) +
scale_alpha(range = c(0.35, 0.9), guide="none") +
# outline Chicago wards over data
geom_sf(data = wards.2015, color="black", fill=NA) +
# label wards with the most movement
geom_text_repel(data=wards.2015 %>% filter(ward %in% c(10, 18)),
aes(x=long, y=lat, label=ward),
fontface="bold",
force = 5,
size = 6,
direction = "both",
hjust=0) +
geom_text_repel(data=wards.2015 %>% filter(ward %in% c(9, 13, 14, 33, 34)),
aes(x=long, y=lat, label=ward),
nudge_x = -.35,
segment.size = 0.5,
segment.color = get_dt_cols("cocoa"),
fontface="bold",
size = 6,
force = 5,
direction = "both",
hjust=0) +
geom_text_repel(data = wards.2015 %>% filter(ward %in% c(11)),
aes(x=long, y=lat, label=ward),
nudge_x = .15,
segment.size = 0.5,
segment.color = get_dt_cols("cocoa"),
fontface="bold",
size = 6,
force = 10,
direction = "both",
hjust = 1) +
coord_sf(datum = NA) +
theme_map_modest() +
theme(plot.margin = unit(c(20, 0, 0, 0), "pt"),
legend.title = element_text(size=15),
legend.text = element_text(size=12),
plot.title = element_text(size = 20, face = "bold", hjust="0.5", margin = margin(t = 15)),
plot.subtitle = element_text(size = 15, margin = margin(t = 15)),
plot.caption = element_text(size = 15)) +
# plot each year separately
facet_wrap( ~ id) + labs(
title = "Latinx Populations in Chicago\nPushed to South, West Neighborhoods",
subtitle = "Chicago Racial and Ethnic Group Movement\nby Census Tract Since 2012 (5 year averages)",
caption = "Source: U.S. Census Bureau", fill = "Predominant Race in Tract")
output_j <- readRDS(here::here("data", "j_all_wards_all_dates.Rda"))
allDatesCount.df <- output_j %>% expand(SIDE, WARD, `APPLICATION TYPE`, count_date) %>%
full_join(output_j) %>% arrange(SIDE, WARD, `APPLICATION TYPE`, count_date) %>%
mutate(
activity_wk = lubridate::as_date(
cut(count_date, breaks = "week", start.on.monday = FALSE, origin = lubridate::origin)),
activity_month = lubridate::as_date(
cut(count_date, breaks = "month", start.on.monday = FALSE, origin = lubridate::origin)),
activity_qtr = lubridate::as_date(
cut(count_date, breaks = "quarter", start.on.monday = FALSE, origin = lubridate::origin))
)
## Joining, by = c("SIDE", "WARD", "APPLICATION TYPE", "count_date")
allDatesCount.df %>% filter(`APPLICATION TYPE` %in% c("ISSUE", "RENEW")) %>% group_by(SIDE, activity_qtr) %>%
summarise(active_businesses = sum(active_businesses)) %>% arrange(activity_qtr, desc(active_businesses)) %>%
ggplot(aes(x=activity_qtr, y=active_businesses, group=SIDE)) +
geom_bar(aes(x=activity_qtr, y=active_businesses, group=SIDE, fill=SIDE), stat = "identity") +
scale_fill_dt("main") +
geom_text(aes(label=active_businesses), size = 4.5, position = position_stack(vjust = 0.5), color="white") +
scale_y_continuous(labels = scales::comma) +
scale_x_date(date_labels = "%b %y",
date_breaks = "1 year", limits = c(ymd("2012-01-01"), ymd("2018-12-31"))) +
theme_modest() +
theme(legend.position = c(0.5, 0.9),
legend.direction = "horizontal",
axis.title.x = element_blank(),
plot.margin = unit(c(2,2,2,2),"cm")) +
labs(y="Number of Business Licenses Issued or Renewed", colour="Chicago Council Ward", caption="Data Source: Chicago Open Data Portal", title="Business Activity Stagnant in Northwest, Far Southwest, Far Southeast", subtitle = "Minimal New and Renewing Businesses Since 2012 in Three Chicago Areas", fill="Chicago Area")
register_google(key = Sys.getenv("GOOGLEMAPS_KEY"))
# read in Chicago ward geos in ggmap-compatible format
wards.shp.2015 <- here::here("data","Geofiles - Chicago Zip Code and Neighborhood","Boundaries - Wards (2015-)","geo_export_0bb2e9fd-20ca-415b-a96a-7722d72c1b41.shp")
wards2015 <- shapefile(wards.shp.2015)
wards2015@data <- mutate(wards2015@data, id = rownames(wards2015@data))
wards2015.points <- fortify(wards2015, region="id")
wards2015.df <- merge(wards2015.points, wards2015@data, by="id")
# name license types
license_types <- c("ISSUE" = "New Business Licenses",
"RENEW" = "Business License Renewals")
# filter for 2012+ issuances and licenses
# count each business only once per year)
bl <- bus_licenses %>% filter(activity_yr >= 2012, active == 1, `APPLICATION TYPE`%in% c('ISSUE', 'RENEW')) %>%
distinct(`LICENSE ID`, activity_yr, `APPLICATION TYPE`, LONGITUDE, LATITUDE)
# pull in Chicago terrain map from Google Maps
ggmap::ggmap(ggmap::get_googlemap(center = c(lon = -87.732125, lat = 41.83379),
zoom = 10, scale = 1,
maptype ='terrain',
color = 'color',
key = Sys.getenv("GOOGLEMAPS_KEY"))) +
# layer licenses over each other at extremely low alpha
geom_point(data=bl, aes(x=LONGITUDE, y=LATITUDE, color=as.factor(activity_yr)), alpha = 0.05, show.legend = FALSE, na.rm = TRUE) +
# add Chicago Ward boundaries
geom_path(data = wards2015.df, aes(long,lat,group=group), color="black") +
geom_polygon(data = wards2015.df, aes(long,lat,group=group), fill=NA) +
scale_color_dt("desert") +
labs(y="Latitude", x="Longitude", colour="Year", title="Money in the Middle",
subtitle="Virtually No New Business Entry, Renwal in Chicago Wards 9, 10, 18, or 41 since\n2009",
caption = "Data Source: City of Chicago Department of Business Affairs and Consumer Protection"
) +
theme_modest() +
facet_grid(. ~ `APPLICATION TYPE`, labeller=as_labeller(license_types)) +
theme(
panel.border = element_blank(),
axis.text.y = element_blank(),
panel.grid.major.y = element_blank(),
panel.grid.minor.y = element_blank(),
axis.title = element_blank(),
axis.text.x = element_blank(),
panel.grid.major.x = element_blank(),
panel.grid.minor.x = element_blank(),
plot.title = element_text(face = "bold", hjust="0.5", margin = margin(t = 15)),
plot.subtitle = element_text(hjust="0.5", margin = margin(t = 20))
) +
# zoom graph to outer ward boundaries
scale_x_continuous(limits = c(wards2015.df %>% dplyr::select(long) %>% min() - 0.1, wards2015.df %>% dplyr::select(long) %>% max() + 0.05), expand = c(0, 0)) +
scale_y_continuous(limits = c(wards2015.df %>% dplyr::select(lat) %>% min() - 0.1, wards2015.df %>% dplyr::select(lat) %>% max()) + 0.05, expand = c(0, 0))
# filter for business license issuances and renewals, and create a monthly count
bus_licenses %>% filter(!is.na(WARD), active==1, activity_date > mdy("1/1/2012")) %>%
group_by(activity_month, activity_yr, WARD, SIDE) %>%
summarise(business_count = n()) %>% group_by(WARD) %>% arrange(desc(business_count)) %>%
# plot boxplot of median monthly issuances and renewals for each ward
ggplot(aes(x=reorder(WARD, business_count, FUN = median), y=business_count)) +
# color by Chicago Side
geom_boxplot(aes(group=as.factor(WARD), fill=as.factor(SIDE)), alpha=0.5) +
scale_fill_dt("diverging") +
# label each each box by corresponding Ward, just above the median (8 pt)
geom_text(data = bus_licenses %>% filter(!is.na(WARD), active==1) %>%
group_by(activity_month, activity_yr, WARD, SIDE) %>%
summarise(business_count = n()) %>% group_by(WARD) %>%
arrange(desc(business_count)) %>%
summarise(median_issuances = median(business_count)) %>%
arrange(median_issuances),
aes(x=as.factor(WARD), y = median_issuances + 50, group=as.factor(WARD),
label=as.factor(WARD)),
color="#6E2C49", fontface="bold", size=6) +
# limit scale to 750, (one Loop ward's outliers extend ~1000 above other wards)
# still very clearly the highest even without all outliers visible
scale_y_continuous(limits = c(0, 750)) +
labs(x="Chicago Council Ward", y="Average Monthly Business Count", caption="Data Source: Chicago Open Data Portal", title="Least New & Surviving Businesses\nin South, West Wards for 15+ Years", subtitle="Calumet Heights, Westlawn Average less than 25 Monthly\nBusiness License Issuances",fill="Chicago Area") +
theme_modest() +
theme(
plot.margin = unit(c(20, 0, 0, 0), "pt"),
panel.grid.major.x = element_blank(),
axis.ticks = element_blank(),
axis.text.x = element_blank(),
plot.title = element_text(face = "bold", hjust="0.5", margin = margin(t = 15)),
plot.subtitle = element_text(margin = margin(t = 15))
)
# read in turnout and change in turnout by Chicago ward and Chciago Side
turnoutSides <- readRDS(here::here("data", "sides_turnout.Rda"))
turnoutDiff <- readRDS(here::here("data", "wards_turnout.Rda"))
# plot average turnout in 2011 and 2015 for each Chicago Side
ggplot(turnoutSides, aes(x=as.factor(YEAR), y=TURNOUT, group=as.factor(SIDE))) +
geom_line(aes(colour=SIDE), size=1.5, alpha=0.75, color="grey", show.legend = FALSE) +
# geom_line(data = filter(turnoutSides, (DIFFERENCE > -0)||(is.na(DIFFERENCE))), aes(colour=SIDE), size=1.5, show.legend=FALSE) +
scale_color_dt("mixed", reverse = TRUE) +
# plot each ward's turnout in 2011 and 2015, shaded by Chicago side to provide context for
# distribution within each Side.
geom_line(data = turnoutDiff, aes(group=as.factor(WARD), color=SIDE), size=0.5, alpha=0.25, show.legend = FALSE) +
# add mean turnout (%) for each Side to x-axis
geom_label(data = turnoutDiff %>% group_by(YEAR, SIDE) %>% summarise(MEAN_TURNOUT = round(mean(TURNOUT, na.rm=TRUE),1)),
aes(x=as.factor(YEAR), y= MEAN_TURNOUT, label = paste0(MEAN_TURNOUT, "%"), group=as.factor(SIDE)),
color = "darkgray",
label.padding = unit(0.05, "lines"),
label.size = 0.0,
fontface = "bold",
size = 4) +
# label each Side in line graphs for both 2011 and 2015
geom_label_repel(data = turnoutSides %>% filter(YEAR == 2011),
aes(label = paste0(SIDE), color = SIDE),
fill=NA,
hjust = "left",
nudge_x = -.25,
force=5,
direction = "both",
fontface = "bold",
point.padding = 3,
size = 5,
show.legend = FALSE) +
geom_label_repel(data = turnoutSides %>% filter(YEAR == 2015),
aes(label = paste0(SIDE), color = SIDE),
fill=NA,
hjust = "right",
nudge_x = .25,
force = 7.5,
direction = "both",
fontface = "bold",
point.padding = 3,
size = 5,
show.legend = FALSE) +
# move x-axis text to top of graph
scale_x_discrete(position = "top") +
# coord_cartesian(ylim=c(23.5, 60)) +
theme_modest() +
theme(axis.text.x.top = element_text(size=rel(1.25), vjust = -8, face="bold"),
axis.title.y = element_blank(),
axis.title.x = element_blank(),
axis.text.y = element_blank(),
panel.grid.minor=element_blank(),
panel.grid.major=element_blank()) +
labs(title="Highest Voter Turnout In Wards Facing Deinvestment",
subtitle="Change in Voter Turnout Between 2011 and 2015 Chicago City Council Elections",
caption="Source: Chicago Board of Election Commissioners", color="Chicago Area")